; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.1 | FileCheck %s --check-prefix=SSE41

define double @test1_add(double %A, double %B) {
; SSE41-LABEL: test1_add:
; SSE41:       # %bb.0:
; SSE41-NEXT:    paddd %xmm1, %xmm0
; SSE41-NEXT:    retq
  %1 = bitcast double %A to <2 x i32>
  %2 = bitcast double %B to <2 x i32>
  %add = add <2 x i32> %1, %2
  %3 = bitcast <2 x i32> %add to double
  ret double %3
}

define double @test2_add(double %A, double %B) {
; SSE41-LABEL: test2_add:
; SSE41:       # %bb.0:
; SSE41-NEXT:    paddw %xmm1, %xmm0
; SSE41-NEXT:    retq
  %1 = bitcast double %A to <4 x i16>
  %2 = bitcast double %B to <4 x i16>
  %add = add <4 x i16> %1, %2
  %3 = bitcast <4 x i16> %add to double
  ret double %3
}

define double @test3_add(double %A, double %B) {
; SSE41-LABEL: test3_add:
; SSE41:       # %bb.0:
; SSE41-NEXT:    paddb %xmm1, %xmm0
; SSE41-NEXT:    retq
  %1 = bitcast double %A to <8 x i8>
  %2 = bitcast double %B to <8 x i8>
  %add = add <8 x i8> %1, %2
  %3 = bitcast <8 x i8> %add to double
  ret double %3
}

define double @test1_sub(double %A, double %B) {
; SSE41-LABEL: test1_sub:
; SSE41:       # %bb.0:
; SSE41-NEXT:    psubd %xmm1, %xmm0
; SSE41-NEXT:    retq
  %1 = bitcast double %A to <2 x i32>
  %2 = bitcast double %B to <2 x i32>
  %sub = sub <2 x i32> %1, %2
  %3 = bitcast <2 x i32> %sub to double
  ret double %3
}

define double @test2_sub(double %A, double %B) {
; SSE41-LABEL: test2_sub:
; SSE41:       # %bb.0:
; SSE41-NEXT:    psubw %xmm1, %xmm0
; SSE41-NEXT:    retq
  %1 = bitcast double %A to <4 x i16>
  %2 = bitcast double %B to <4 x i16>
  %sub = sub <4 x i16> %1, %2
  %3 = bitcast <4 x i16> %sub to double
  ret double %3
}

define double @test3_sub(double %A, double %B) {
; SSE41-LABEL: test3_sub:
; SSE41:       # %bb.0:
; SSE41-NEXT:    psubb %xmm1, %xmm0
; SSE41-NEXT:    retq
  %1 = bitcast double %A to <8 x i8>
  %2 = bitcast double %B to <8 x i8>
  %sub = sub <8 x i8> %1, %2
  %3 = bitcast <8 x i8> %sub to double
  ret double %3
}

define double @test1_mul(double %A, double %B) {
; SSE41-LABEL: test1_mul:
; SSE41:       # %bb.0:
; SSE41-NEXT:    pmulld %xmm1, %xmm0
; SSE41-NEXT:    retq
  %1 = bitcast double %A to <2 x i32>
  %2 = bitcast double %B to <2 x i32>
  %mul = mul <2 x i32> %1, %2
  %3 = bitcast <2 x i32> %mul to double
  ret double %3
}

define double @test2_mul(double %A, double %B) {
; SSE41-LABEL: test2_mul:
; SSE41:       # %bb.0:
; SSE41-NEXT:    pmullw %xmm1, %xmm0
; SSE41-NEXT:    retq
  %1 = bitcast double %A to <4 x i16>
  %2 = bitcast double %B to <4 x i16>
  %mul = mul <4 x i16> %1, %2
  %3 = bitcast <4 x i16> %mul to double
  ret double %3
}

; There is no legal ISD::MUL with type MVT::v16i8.
define double @test3_mul(double %A, double %B) {
; SSE41-LABEL: test3_mul:
; SSE41:       # %bb.0:
; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; SSE41-NEXT:    pmullw %xmm1, %xmm0
; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
; SSE41-NEXT:    retq
  %1 = bitcast double %A to <8 x i8>
  %2 = bitcast double %B to <8 x i8>
  %mul = mul <8 x i8> %1, %2
  %3 = bitcast <8 x i8> %mul to double
  ret double %3
}

define double @test1_and(double %A, double %B) {
; SSE41-LABEL: test1_and:
; SSE41:       # %bb.0:
; SSE41-NEXT:    andps %xmm1, %xmm0
; SSE41-NEXT:    retq
  %1 = bitcast double %A to <2 x i32>
  %2 = bitcast double %B to <2 x i32>
  %and = and <2 x i32> %1, %2
  %3 = bitcast <2 x i32> %and to double
  ret double %3
}

define double @test2_and(double %A, double %B) {
; SSE41-LABEL: test2_and:
; SSE41:       # %bb.0:
; SSE41-NEXT:    andps %xmm1, %xmm0
; SSE41-NEXT:    retq
  %1 = bitcast double %A to <4 x i16>
  %2 = bitcast double %B to <4 x i16>
  %and = and <4 x i16> %1, %2
  %3 = bitcast <4 x i16> %and to double
  ret double %3
}

define double @test3_and(double %A, double %B) {
; SSE41-LABEL: test3_and:
; SSE41:       # %bb.0:
; SSE41-NEXT:    andps %xmm1, %xmm0
; SSE41-NEXT:    retq
  %1 = bitcast double %A to <8 x i8>
  %2 = bitcast double %B to <8 x i8>
  %and = and <8 x i8> %1, %2
  %3 = bitcast <8 x i8> %and to double
  ret double %3
}

define double @test1_or(double %A, double %B) {
; SSE41-LABEL: test1_or:
; SSE41:       # %bb.0:
; SSE41-NEXT:    orps %xmm1, %xmm0
; SSE41-NEXT:    retq
  %1 = bitcast double %A to <2 x i32>
  %2 = bitcast double %B to <2 x i32>
  %or = or <2 x i32> %1, %2
  %3 = bitcast <2 x i32> %or to double
  ret double %3
}

define double @test2_or(double %A, double %B) {
; SSE41-LABEL: test2_or:
; SSE41:       # %bb.0:
; SSE41-NEXT:    orps %xmm1, %xmm0
; SSE41-NEXT:    retq
  %1 = bitcast double %A to <4 x i16>
  %2 = bitcast double %B to <4 x i16>
  %or = or <4 x i16> %1, %2
  %3 = bitcast <4 x i16> %or to double
  ret double %3
}

define double @test3_or(double %A, double %B) {
; SSE41-LABEL: test3_or:
; SSE41:       # %bb.0:
; SSE41-NEXT:    orps %xmm1, %xmm0
; SSE41-NEXT:    retq
  %1 = bitcast double %A to <8 x i8>
  %2 = bitcast double %B to <8 x i8>
  %or = or <8 x i8> %1, %2
  %3 = bitcast <8 x i8> %or to double
  ret double %3
}

define double @test1_xor(double %A, double %B) {
; SSE41-LABEL: test1_xor:
; SSE41:       # %bb.0:
; SSE41-NEXT:    xorps %xmm1, %xmm0
; SSE41-NEXT:    retq
  %1 = bitcast double %A to <2 x i32>
  %2 = bitcast double %B to <2 x i32>
  %xor = xor <2 x i32> %1, %2
  %3 = bitcast <2 x i32> %xor to double
  ret double %3
}

define double @test2_xor(double %A, double %B) {
; SSE41-LABEL: test2_xor:
; SSE41:       # %bb.0:
; SSE41-NEXT:    xorps %xmm1, %xmm0
; SSE41-NEXT:    retq
  %1 = bitcast double %A to <4 x i16>
  %2 = bitcast double %B to <4 x i16>
  %xor = xor <4 x i16> %1, %2
  %3 = bitcast <4 x i16> %xor to double
  ret double %3
}

define double @test3_xor(double %A, double %B) {
; SSE41-LABEL: test3_xor:
; SSE41:       # %bb.0:
; SSE41-NEXT:    xorps %xmm1, %xmm0
; SSE41-NEXT:    retq
  %1 = bitcast double %A to <8 x i8>
  %2 = bitcast double %B to <8 x i8>
  %xor = xor <8 x i8> %1, %2
  %3 = bitcast <8 x i8> %xor to double
  ret double %3
}

define double @test_fadd(double %A, double %B) {
; SSE41-LABEL: test_fadd:
; SSE41:       # %bb.0:
; SSE41-NEXT:    addps %xmm1, %xmm0
; SSE41-NEXT:    retq
  %1 = bitcast double %A to <2 x float>
  %2 = bitcast double %B to <2 x float>
  %add = fadd <2 x float> %1, %2
  %3 = bitcast <2 x float> %add to double
  ret double %3
}

define double @test_fsub(double %A, double %B) {
; SSE41-LABEL: test_fsub:
; SSE41:       # %bb.0:
; SSE41-NEXT:    subps %xmm1, %xmm0
; SSE41-NEXT:    retq
  %1 = bitcast double %A to <2 x float>
  %2 = bitcast double %B to <2 x float>
  %sub = fsub <2 x float> %1, %2
  %3 = bitcast <2 x float> %sub to double
  ret double %3
}

define double @test_fmul(double %A, double %B) {
; SSE41-LABEL: test_fmul:
; SSE41:       # %bb.0:
; SSE41-NEXT:    mulps %xmm1, %xmm0
; SSE41-NEXT:    retq
  %1 = bitcast double %A to <2 x float>
  %2 = bitcast double %B to <2 x float>
  %mul = fmul <2 x float> %1, %2
  %3 = bitcast <2 x float> %mul to double
  ret double %3
}

